import re
import time

import pandas as pd

from finance.selenium_test import getBrowser


def baidu(company_name):
    browser = getBrowser()
    url = 'https://aiqicha.baidu.com/s?q=' + company_name
    browser.get(url)
    time.sleep(2)  # 休息2秒，防止页面没加载完
    data = browser.page_source

    p_href = '<h3 data-v-387da8b0="" class="title"><a data-v-387da8b0="" target="_blank" href="(.*?)"'
    href = re.findall(p_href, data)
    url2 = 'https://aiqicha.baidu.com' + href[0]
    browser.get(url2)
    time.sleep(2)  # 休息2秒，防止页面没加载完
    data = browser.page_source
    table = pd.read_html(data)
    df = table[1]

    browser.quit()  # 退出模拟浏览器

    company = df['股东名称'][0]
    company_split = company.split(' ')
    for i in company_split:
        if len(i) > 6:  # 不要用if '有限公司' in i，这个不太好，例如国资委不含有“有限公司 ”字样
            return i

if __name__ == '__main__':
    company = '华能信托'
    while True:
        try:
            company = baidu(company)  # 循环迭代
            print(company)
        except:
            break  # 跳出循环